library(tidyverse)
library(ggstatsplot)
library(gapminder)
library(ggforce)
library(Hmisc)
library(plotly)
library(patchwork)
df <- gapminder
table(df$continent)
##
## Africa Americas Asia Europe Oceania
## 624 300 396 360 24
df <- df %>%
filter(year==2007 & !(continent %in% c("Oceania"))) %>%
select(country,continent,lifeExp)
df$continent <- reorder(df$continent, df$lifeExp, median)
df$continent <- droplevels(df$continent)
table(df$continent)
##
## Africa Asia Americas Europe
## 52 33 25 30
p1 <- df %>%
ggplot(aes(x=continent,y=lifeExp,fill=continent)) +
geom_boxplot(alpha=0.75,show.legend=TRUE) +
geom_violin(alpha=0.25,show.legend=FALSE) +
labs(x="Continent",y="Life expectancy",title="Life Expectancy by Continent",caption= "Source: gapminder data set") +
scale_fill_brewer(palette="Set1") +
theme_minimal(base_family = "Times New Roman",base_size=14) +
theme(plot.title = element_text(hjust=0.5,
face="bold"),
axis.title.x = element_text(face="italic"),
axis.title.y =element_text(face="italic"),
plot.caption = element_text(face="italic")) +
ggpubr::bgcolor("#FEFEFA")
p1

continent_count <- df%>%
group_by(continent)%>%
count()
p2 <- df %>%
ggplot(aes(x=continent,y=lifeExp,fill=continent)) +
geom_boxplot(alpha=0.75,show.legend = TRUE) +
#geom_violin(alpha=0.25,show.legend=FALSE,adjust=0.75) +
geom_dotplot(binaxis = "y",
binwidth = 1.25,
stackdir = "center",
show.legend = FALSE) +
scale_fill_brewer(palette="Set1") +
scale_x_discrete(labels = paste(continent_count$continent, "\nn = ",continent_count$n)) +
labs(x="Continent",y="Life expectancy",title="Life Expectancy by Continent",caption= "Source: gapminder data set") +
geom_hline(yintercept = mean(df$lifeExp, na.rm = T) ,
linetype = "dashed",
color = "green",
size = 1) +
theme_minimal(base_family = "Times New Roman",base_size=14) +
theme(plot.title = element_text(hjust=0.5,
face="bold"),
axis.title.x = element_text(face="italic"),
axis.title.y =element_text(face="italic"),
plot.caption = element_text(face="italic")) +
ggpubr::bgcolor("#FEFEFA")
p2

Notes:
- ggbetweenstats() is used for creating boxplots as well as computing
statistical tests (such as Welsh T test, T test, ANOVA, Kruskal -
Wallis, Mann-Whitney)
- the package also computes post-hoc tests (if needed) such as Dunn
test for the Kruskal-Wallis test and displays pairwise p-values
p3 <- ggbetweenstats(data=df,
x = continent,
y = lifeExp,
type = "np",
palette = "Set1") +
theme_minimal(base_size = 12) +
ggpubr::bgcolor("#FEFEFA")
p3

Notes:
- notch=TRUE pictures 95% confidence interval around the median
- notch is calculated by this formula: the median +/- (1.57 * Q3 -
Q1)/square root of n
- adding information on the X axis labels about the quantity of each
category is possible by grouping the data (to get the count of each
category) and using paste() function in the scale_x_discrete()
function
- geom_sina() is a more robust alternative to geom_point() or
geom_jitter()
- stat_summary() functions allow to picture both the mean and error
bars around the mean
p4 <- ggplot(df,aes(x=continent,
y=lifeExp)) +
geom_boxplot(alpha=0.75,
notch=TRUE,
fill="white",
outlier.colour = "white", outlier.size = 0) +
scale_x_discrete(labels = paste(continent_count$continent, "\nn = ",continent_count$n)) +
geom_violin(alpha=0.1)+
ggforce::geom_sina(size=1.5,aes(y=lifeExp,color=continent,
text = paste0("Continent: ",continent,
"\nCountry: ",country,
"\nLife expectancy: ",lifeExp))) +
stat_summary(fun.data = mean_cl_boot,
geom="point",
color="blue",
size=5,
show.legend = FALSE)+
stat_summary(fun.data = mean_cl_boot,
geom="errorbar",
color="blue",
size=1,
width=0.4)+
labs(x="Continent",y="Life expectancy",title="Life Expectancy by Continent",caption= "Source: gapminder data set") +
#scale_fill_brewer(palette="Set1") +
scale_color_brewer(palette="Set1") +
geom_hline(yintercept = mean(df$lifeExp, na.rm = T) ,
linetype = "dashed",
color = "green",
size = 1) +
theme_bw(base_size=14,base_family = "Times New Roman") +
theme(plot.title = element_text(hjust=0.5,
face="bold"),
axis.title.x = element_text(face="italic"),
axis.title.y =element_text(face="italic"),
plot.caption = element_text(face="italic"))+
ggpubr::bgcolor("#FEFEFA")
p4

ggplotly(p4,tooltip = "text")